#!/usr/bin/env python

"""
Print to stdout a data file with 2000 lines of random data with header such as:

    Age,Gender,BMI,Weight,Years Smoking
    45,M,25.2,189,0
    34,F,20.0,120,2
...
"""

from __future__ import with_statement, division

from random import gauss, expovariate, uniform, random

print "Age,Gender,BMI,Weight,Years Smoking"

def random_gender():
    return "M" if random() < .48 else "F"

def random_age():
    return int(uniform(21, 81))

def random_weight(gender):
    if gender == "M":
        return int(max(min(gauss(190, 30), 300), 120))
    else:
        return int(max(min(gauss(140, 25), 250), 90))

def random_smoking():
    return int(expovariate(1.5))

def random_bmi(gender, weight, smoking):
    return max(min(gauss(20, 3), 41), 13) + (weight * random() / 100) + (smoking * random())

def random_sample():
    gender = random_gender()
    age = random_age()
    smoking = random_smoking()
    weight = random_weight(gender)
    bmi = random_bmi(gender, weight, smoking)
    return (age, gender, bmi, weight, smoking)

for i in xrange(2000):
    sample = random_sample()
    print "%d,%s,%.1f,%d,%d" % sample

